!pip install wordcloud
import pandas as pd
import numpy as np
import scipy.stats as scs
import statsmodels.api as sm
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
%matplotlib inline
%config InlineBackend.figure_format='retina'
df = pd.read_csv('small_descr_clm_code.csv')
df.drop('Unnamed: 0',axis=1, inplace=True)
df.head()
df['descr_clm'] = df.descr + df.clm
df.drop(['descr','clm'],axis=1, inplace=True)
df['code'] = df['code'].astype('category')
df.head()
df_705 = df[df['code']==705]
df_705.head()
custom_stopword_list = ['wherein','subject matter','subject','matter','first','second','include','includes','comprise','said','disclosed','presently disclosed','system','process','method','one', 'may','claim','embodiment','invention','include', 'example', 'include','step','figure','fig']
stopwords is a set so to add a list of words into that set I can use set.add() for an element but that didn't work for a list. I used set |= set(list) . it is a Union function.
stopwords = STOPWORDS
stopwords |= set(custom_stopword_list)
text = df_705.descr_clm.values
wordcloud_705 = WordCloud(
width = 3000,
height = 2000,
background_color = 'black',
stopwords = stopwords).generate(str(text))
fig = plt.figure(
figsize = (40, 30),
facecolor = 'k',
edgecolor = 'k')
plt.imshow(wordcloud_705, interpolation = 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()
df_706 = df[df['code']==706]
df_706.head()
text = df_706.descr_clm.values
wordcloud_706 = WordCloud(
width = 3000,
height = 2000,
background_color = 'black',
stopwords = stopwords).generate(str(text))
fig = plt.figure(
figsize = (40, 30),
facecolor = 'k',
edgecolor = 'k')
plt.imshow(wordcloud_706, interpolation = 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
plt.show()